In [1]:
import os

## Set directory
os.chdir('/hpc/group/pbenfeylab/CheWei/CW_data/genesys')

import networkx as nx
from genesys_evaluate import *
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import warnings
# Suppress all warning messages
warnings.filterwarnings("ignore", category=DeprecationWarning)
In [2]:
## Conda Env pytorch-gpu on DCC
print(torch.__version__)
print(sc.__version__) 
1.13.0.post200
1.9.1
In [3]:
## Genes considered/used (shared among samples) 
gene_list = pd.read_csv('./gene_list_1108.csv')

Load Data¶

In [4]:
with open("./genesys_root_data.pkl", 'rb') as file_handle:
    data = pickle.load(file_handle)
    
batch_size = 2000
dataset = Root_Dataset(data['X_test'], data['y_test'])
loader = DataLoader(dataset,
                         batch_size = batch_size,
                         shuffle = True, drop_last=True)
In [5]:
input_size = data['X_train'].shape[1]
## 10 cell types 
output_size = 10
embedding_dim = 256
hidden_dim = 256
n_layers = 2
device = "cpu"
path = "./"

Load trained GeneSys model¶

In [6]:
model = ClassifierLSTM(input_size, output_size, embedding_dim, hidden_dim, n_layers).to(device)
model.load_state_dict(torch.load(path+"best_ALL_1130_continue.pth", map_location=torch.device('cpu')))
model = model
model.eval()
Out[6]:
ClassifierLSTM(
  (fc1): Sequential(
    (0): Linear(in_features=17513, out_features=256, bias=True)
    (1): Dropout(p=0.2, inplace=False)
    (2): GaussianNoise()
  )
  (fc): Sequential(
    (0): ReLU()
    (1): Linear(in_features=512, out_features=512, bias=True)
    (2): ReLU()
    (3): Linear(in_features=512, out_features=10, bias=True)
  )
  (lstm): LSTM(256, 256, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (b_to_z): DBlock(
    (fc1): Linear(in_features=512, out_features=256, bias=True)
    (fc2): Linear(in_features=512, out_features=256, bias=True)
    (fc_mu): Linear(in_features=256, out_features=512, bias=True)
    (fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
  )
  (bz2_infer_z1): DBlock(
    (fc1): Linear(in_features=1024, out_features=256, bias=True)
    (fc2): Linear(in_features=1024, out_features=256, bias=True)
    (fc_mu): Linear(in_features=256, out_features=512, bias=True)
    (fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
  )
  (z1_to_z2): DBlock(
    (fc1): Linear(in_features=512, out_features=256, bias=True)
    (fc2): Linear(in_features=512, out_features=256, bias=True)
    (fc_mu): Linear(in_features=256, out_features=512, bias=True)
    (fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
  )
  (z_to_x): Decoder(
    (fc1): Linear(in_features=512, out_features=256, bias=True)
    (fc2): Linear(in_features=256, out_features=256, bias=True)
    (fc3): Linear(in_features=256, out_features=17513, bias=True)
  )
)
In [7]:
classes = ['Columella', 'Lateral Root Cap', 'Phloem', 'Xylem', 'Procambium', 'Pericycle', 'Endodermis', 'Cortex', 'Atrichoblast', 'Trichoblast']
class2num = {c: i for (i, c) in enumerate(classes)}
num2class = {i: c for (i, c) in enumerate(classes)}
In [8]:
cts = ['Atrichoblast','Trichoblast','Cortex','Endodermis','Pericycle','Procambium','Xylem','Phloem','Lateral Root Cap','Columella']
ctw = np.zeros((len(cts), 17513, 17513))
## number of cells sampled from the atlas
batch_size = 2000
In [9]:
## GRN for the transition t5 to t7
for ct in cts:
    print(ct)
    cws = np.zeros((len(loader), 17513, 17513))
    with torch.no_grad():
        for i, sample in enumerate(loader):
            x = sample['x'].to(device)
            y = sample['y'].to(device)
            y_label = [num2class[i] for i in y.tolist()]
            
            pred_h = model.init_hidden(batch_size)
            tfrom = model.generate_next(x, pred_h, 5).to('cpu').detach().numpy()
            cfrom = tfrom[np.where(np.array(y_label)==ct)[0],:]
            
            pred_h = model.init_hidden(batch_size)
            tto = model.generate_next(x, pred_h, 7).to('cpu').detach().numpy()   
            cto = tto[np.where(np.array(y_label)==ct)[0],:]
            
            cw = torch.linalg.lstsq(torch.tensor(cfrom), torch.tensor(cto)).solution.detach().numpy()
            cws[i] = cw
    
    ## Calculate mean across number of repeats
    cwm = np.mean(cws, axis=0)
    ctw[cts.index(ct)] = cwm
Atrichoblast
Trichoblast
Cortex
Endodermis
Pericycle
Procambium
Xylem
Phloem
Lateral Root Cap
Columella
In [10]:
# Save the array to disk
np.save('genesys_ctw_t5-t7.npy', ctw)
In [9]:
ctw = np.load('genesys_ctw_t5-t7.npy')
In [10]:
## Calculate z-scores
ctw_z = np.zeros((len(cts), 17513, 17513))
for i in range(len(cts)):
    ctw_z[i] = (ctw[i] - np.mean(ctw[i])) / np.std(ctw[i])
In [11]:
## Filtering based on z-scores (with no weights)
ctw_f = np.zeros((len(cts), 17513, 17513))
## z-score threshold (keep values > mean + threshold*std)
threshold=3
for i in range(len(cts)):
    ctw_f[i] = np.abs(ctw_z[i]) > threshold

Load TFs list¶

In [12]:
wanted_TFs = pd.read_csv("./Kay_TF_thalemine_annotations.csv")
In [13]:
## Make TF names unique and assign preferred names
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G33880"]="WOX9"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G45160"]="SCL27"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G04410"]="NAC78"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G29035"]="ORS1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02540"]="ZHD3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G16500"]="IAA26"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G09740"]="HAG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G24660"]="ZHD2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G46880"]="HDG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G28420"]="RLT1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G14580"]="BLJ"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G45260"]="BIB"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02070"]="RVN"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G28160"]="FIT"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G68360"]="GIS3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G20640"]="NLP4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G05550"]="VFP5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G59470"]="FRF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G15150"]="HAT7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G14750"]="WER"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G75710"]="BRON"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G74500"]="TMO7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G12646"]="RITF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G48100"]="ARR5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G16141"]="GATA17L"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G65640"]="NFL"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G62700"]="VND5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G36160"]="VND2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G66300"]="VND3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G12260"]="VND4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G62380"]="VND6"
In [14]:
pd.Series(wanted_TFs['Name']).value_counts().head(5)
Out[14]:
NAC001    1
PRE5      1
MYB118    1
MYB21     1
MYB0      1
Name: Name, dtype: int64

Network analysis¶

In [15]:
TFidx = []
for i in wanted_TFs['GeneID']:
    if i in gene_list['features'].tolist():
        TFidx.append(np.where(gene_list['features']==i)[0][0])

TFidx = np.sort(np.array(TFidx))
In [16]:
def network(i):
    ## No weights
    adj_nw = ctw_f[i]
    ## Weighted
    adj = ctw[i]*ctw_f[i]
    ## TF only
    adj = adj[np.ix_(TFidx,TFidx)]
    adj_nw = adj_nw[np.ix_(TFidx,TFidx)]
    
    ## Remove no connect 
    regidx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[0]).value_counts().index[pd.Series(np.where(adj_nw==True)[0]).value_counts()>=1]))
    taridx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[1]).value_counts().index[pd.Series(np.where(adj_nw==True)[1]).value_counts()>=1]))
    ## Reciprocol
    keepidx = np.sort(np.array(list(set(regidx).intersection(taridx))))
    #keepidx = np.sort(np.array(list(set(regidx).union(taridx))))
    
    TFID = np.array(gene_list['features'][TFidx])[keepidx].tolist()
    ## TF name to keep
    TFname = []
    for i in np.array(gene_list['features'][TFidx])[keepidx]:
        TFname.append(wanted_TFs['Name'][np.where(wanted_TFs['GeneID']==i)[0][0]])
        
    adj = adj[np.ix_(keepidx,keepidx)]
    
    # Create a NetworkX graph for non-directed edges
    G = nx.Graph()  # supports directed edges and allows for multiple edges between the same pair of nodes
    
    # Add nodes to the graph
    num_nodes = adj.shape[0]
    for i, name in enumerate(TFname):
        G.add_node(i, name=name)
    
    # Add edges to the graph with weights
    for i in range(num_nodes):
        for j in range(num_nodes):
            weight = adj[i, j]
            if weight != 0:
                G.add_edge(i, j, weight=abs(weight), distance=1/abs(weight))
            

    ## Measures the extent to which how close a node is to all other nodes in the network, considering the shortest paths or geodesic distances between nodes
    closeness_centrality = nx.closeness_centrality(G, distance='distance')
    ## Measures the extent to which a node that are not only well-connected but also connected to other well-connected nodes.
    eigenvector_centrality = nx.eigenvector_centrality(G)
    
    # Create a NetworkX graph for diected edges
    G = nx.MultiDiGraph()  # supports directed edges and allows for multiple edges between the same pair of nodes
    
    # Add nodes to the graph
    num_nodes = adj.shape[0]
    for i, name in enumerate(TFname):
        G.add_node(i, name=name)
    
    # Add edges to the graph with weights
    for i in range(num_nodes):
        for j in range(num_nodes):
            weight = adj[i, j]
            if weight != 0:
                G.add_edge(i, j, weight=weight)
    
    ## Measures the number of connections (edges) each node has
    degree_centrality = nx.degree_centrality(G)
    # Calculate outgoing centrality
    out_centrality = nx.out_degree_centrality(G)
    # Calculate incoming centrality
    in_centrality = nx.in_degree_centrality(G)
    ## Measures the extent to which a node lies on the shortest paths between other nodes.
    betweenness_centrality = nx.betweenness_centrality(G, weight='weight')
    
    ## Non_Reciprocal Out centrality
    # Visualize the graph
    pos = nx.spring_layout(G)  # Positions of the nodes
    
    # Node colors based on weighted betweenness centrality
    node_colors = [out_centrality[node] for node in G.nodes()]
    
    # Node sizes based on weighted betweenness centrality
    node_sizes = [out_centrality[node] * 1000 for node in G.nodes()]

    # Get the edge weights as a dictionary
    edge_weights = nx.get_edge_attributes(G, 'weight')
    edge_colors = ['red' if weight > 0 else 'blue' for (_, _, weight) in G.edges(data='weight')]
    
    # Scale the edge weights to desired linewidths
    max_weight = max(edge_weights.values())
    edge_widths = [float(edge_weights[edge]) / max_weight for edge in G.edges]
    
    # Draw the graph
    nx.draw(G, pos=pos, node_color=node_colors, node_size=node_sizes, with_labels=False, width=edge_widths, edge_color=edge_colors)
    # Add node labels
    labels = {node: G.nodes[node]['name'] for node in G.nodes}
    nx.draw_networkx_labels(G, pos=pos, labels=labels, font_size=8)
    
    # Add a colorbar to show the weighted betweenness centrality color mapping
    sm = plt.cm.ScalarMappable(cmap='viridis', norm=plt.Normalize(vmin=min(node_colors), vmax=max(node_colors)))
    sm.set_array([])
    plt.colorbar(sm)
    
    # Show the plot
    plt.show()
    
    dc = pd.DataFrame.from_dict(degree_centrality, orient='index', columns=['degree_centrality'])
    oc = pd.DataFrame.from_dict(out_centrality, orient='index', columns=['out_centrality'])
    ic = pd.DataFrame.from_dict(in_centrality, orient='index', columns=['in_centrality'])
    bc = pd.DataFrame.from_dict(betweenness_centrality, orient='index', columns=['betweenness_centrality'])
    cc = pd.DataFrame.from_dict(closeness_centrality, orient='index', columns=['closeness_centrality'])
    ec = pd.DataFrame.from_dict(eigenvector_centrality, orient='index', columns=['eigenvector_centrality'])
    df = pd.concat([dc,oc,ic,bc,cc,ec], axis=1)
    df.index =TFname
    df = df.sort_values('betweenness_centrality', ascending=False)
    
    return(df)
In [17]:
atri = network(0)
In [18]:
tri = network(1)
In [19]:
cor = network(2)
In [20]:
end = network(3)
In [21]:
per = network(4)
In [22]:
pro = network(5)
In [23]:
xyl = network(6)
In [24]:
phl = network(7)
In [25]:
lrc = network(8)
In [26]:
col = network(9)
In [27]:
atri.columns = ['atri_degree_centrality','atri_out_centrality','atri_in_centrality','atri_betweenness_centrality','atri_closeness_centrality','atri_eigenvector_centrality']
tri.columns = ['tri_degree_centrality','tri_out_centrality','tri_in_centrality','tri_betweenness_centrality','tri_closeness_centrality','tri_eigenvector_centrality']
cor.columns = ['cor_degree_centrality','cor_out_centrality','cor_in_centrality','cor_betweenness_centrality','cor_closeness_centrality','cor_eigenvector_centrality']
end.columns = ['end_degree_centrality','end_out_centrality','end_in_centrality','end_betweenness_centrality','end_closeness_centrality','end_eigenvector_centrality']
per.columns = ['per_degree_centrality','per_out_centrality','per_in_centrality','per_betweenness_centrality','per_closeness_centrality','per_eigenvector_centrality']
pro.columns = ['pro_degree_centrality','pro_out_centrality','pro_in_centrality','pro_betweenness_centrality','pro_closeness_centrality','pro_eigenvector_centrality']
xyl.columns = ['xyl_degree_centrality','xyl_out_centrality','xyl_in_centrality','xyl_betweenness_centrality','xyl_closeness_centrality','xyl_eigenvector_centrality']
phl.columns = ['phl_degree_centrality','phl_out_centrality','phl_in_centrality','phl_betweenness_centrality','phl_closeness_centrality','phl_eigenvector_centrality']
lrc.columns = ['lrc_degree_centrality','lrc_out_centrality','lrc_in_centrality','lrc_betweenness_centrality','lrc_closeness_centrality','lrc_eigenvector_centrality']
col.columns = ['col_degree_centrality','col_out_centrality','col_in_centrality','col_betweenness_centrality','col_closeness_centrality','col_eigenvector_centrality']
In [28]:
## Indentify main regulators in each net work
tff = []
tff = tff + atri[atri['atri_betweenness_centrality']>0].index.tolist()
tff = tff + tri[tri['tri_betweenness_centrality']>0].index.tolist()
tff = tff + lrc[lrc['lrc_betweenness_centrality']>0].index.tolist()
tff = tff + cor[cor['cor_betweenness_centrality']>0].index.tolist()
tff = tff + end[end['end_betweenness_centrality']>0].index.tolist()
tff = tff + per[per['per_betweenness_centrality']>0].index.tolist()
tff = tff + pro[pro['pro_betweenness_centrality']>0].index.tolist()
tff = tff + xyl[xyl['xyl_betweenness_centrality']>0].index.tolist()
tff = tff + phl[phl['phl_betweenness_centrality']>0].index.tolist()
tff = tff + col[col['col_betweenness_centrality']>0].index.tolist()
tf_occurance = pd.DataFrame(pd.Series(tff).value_counts(), columns=['tf_occurance'])
tf_spec = pd.concat([tf_occurance, atri, tri, lrc, cor, end, per, pro, xyl, phl, col], axis=1)
tf_spec = tf_spec.fillna(0)
In [29]:
## Epidermis (atri, tri, lrc)
celltype1='atri'
celltype2='tri'
celltype3='lrc'
ts = tf_spec[tf_spec['tf_occurance']==3][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==9].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[29]:
atri_betweenness_centrality tri_betweenness_centrality lrc_betweenness_centrality atri_out_centrality tri_out_centrality lrc_out_centrality atri_in_centrality tri_in_centrality lrc_in_centrality centrality_count centrality_sum
ATS 0.000007 0.002572 0.571055 0.161039 0.202505 0.171717 0.01039 0.002088 0.216162 9 10.337534
In [30]:
## atri, tri
celltype1='atri'
celltype2='tri'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[30]:
atri_betweenness_centrality tri_betweenness_centrality atri_out_centrality tri_out_centrality atri_in_centrality tri_in_centrality centrality_count centrality_sum
LRL3 0.219183 0.992239 0.057143 0.933194 0.480519 0.889353 6 9.571631
ARR5 0.915294 0.929036 0.187013 0.300626 0.485714 0.298539 6 9.116222
RHD6 0.662223 0.729090 0.129870 0.622129 0.225974 0.231733 6 8.601019
AT4G09100 0.703051 0.616133 0.103896 0.215031 0.129870 0.342380 6 8.110361
RSL4 0.000027 0.712620 0.033766 0.073069 0.025974 0.496868 6 7.342325
WRKY61 0.207082 0.202973 0.054545 0.087683 0.371429 0.175365 6 7.099077
AT2G37120 0.001508 0.640761 0.031169 0.173278 0.054545 0.173278 6 7.074539
NAC084 0.308185 0.138975 0.051948 0.091858 0.067532 0.100209 6 6.758707
WRKY70 0.000372 0.032285 0.114286 0.098121 0.109091 0.152401 6 6.506555
HRS1 0.051197 0.004145 0.064935 0.064718 0.176623 0.112735 6 6.474354
NAC053 0.044995 0.003066 0.085714 0.037578 0.109091 0.039666 6 6.320110
WRKY47 0.007305 0.004271 0.020779 0.043841 0.153247 0.035491 6 6.264935
In [31]:
## Atrichoblast specific
celltype = 'atri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[31]:
atri_betweenness_centrality atri_out_centrality atri_in_centrality centrality_count centrality_sum
HB17 0.892769 0.166234 0.384416 3 4.443419
AT3G05860 0.887994 0.020779 0.142857 3 4.051630
MYB50 0.701035 0.127273 0.233766 3 4.062074
ATL9 0.059490 0.036364 0.007792 3 3.103646
KAN 0.016545 0.153247 0.181818 3 3.351610
MEA 0.011249 0.187013 0.067532 3 3.265794
FIT 0.010708 0.119481 0.059740 3 3.189928
AT5G22890 0.006358 0.036364 0.220779 3 3.263501
HB30 0.005147 0.059740 0.031169 3 3.096057
AT4G31650 0.003220 0.244156 0.018182 3 3.265557
NLP7 0.002584 0.007792 0.057143 3 3.067519
In [32]:
## Trichoblast specific
celltype = 'tri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[32]:
tri_betweenness_centrality tri_out_centrality tri_in_centrality centrality_count centrality_sum
RSL2 0.930202 0.283925 0.557411 3 4.771539
AT3G53370 0.929967 0.135699 0.515658 3 4.581324
RAP2.11 0.739096 0.177453 0.167015 3 4.083564
WRKY31 0.732702 0.085595 0.219207 3 4.037504
AT5G56200 0.728326 0.206681 0.118998 3 4.054005
AT5G04390 0.044527 0.039666 0.041754 3 3.125947
AT2G05160 0.016461 0.012526 0.137787 3 3.166774
KDR 0.012155 0.198330 0.018789 3 3.229274
HFR1 0.002232 0.041754 0.091858 3 3.135844
RL6 0.001572 0.110647 0.018789 3 3.131009
WRKY3 0.001022 0.035491 0.025052 3 3.061565
GATA12 0.000777 0.048017 0.018789 3 3.067583
AT3G05760 0.000358 0.077244 0.027140 3 3.104742
In [33]:
## LRC specific
celltype = 'lrc'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[33]:
lrc_betweenness_centrality lrc_out_centrality lrc_in_centrality centrality_count centrality_sum
AT1G26680 0.874633 0.448485 0.177778 3 4.500896
GATA2 0.846436 0.371717 0.470707 3 4.688860
ERF9 0.648284 0.139394 0.054545 3 3.842224
OFP6 0.619188 0.270707 0.319192 3 4.209087
IAA1 0.311900 0.117172 0.161616 3 3.590688
LBD4 0.310735 0.018182 0.050505 3 3.379422
ANL2 0.275287 0.064646 0.228283 3 3.568217
RITF1 0.255212 0.105051 0.032323 3 3.392586
NAC094 0.123989 0.301010 0.026263 3 3.451262
HDG1 0.044485 0.032323 0.024242 3 3.101051
AT2G42300 0.013058 0.153535 0.060606 3 3.227199
GATA17L 0.011148 0.151515 0.008081 3 3.170744
SPT 0.009193 0.006061 0.175758 3 3.191011
BZIP34 0.008911 0.090909 0.028283 3 3.128103
AT2G37520 0.007063 0.018182 0.002020 3 3.027265
GATA4 0.003848 0.141414 0.024242 3 3.169505
RBR1 0.002826 0.111111 0.038384 3 3.152321
GRF2 0.002801 0.044444 0.125253 3 3.172498
FEZ 0.001979 0.311111 0.008081 3 3.321171
GRF3 0.001808 0.062626 0.032323 3 3.096757
PYE 0.000458 0.096970 0.058586 3 3.156014
CHR38 0.000094 0.115152 0.024242 3 3.139488
CSDP1 0.000057 0.042424 0.064646 3 3.107128
WRI1 0.000033 0.125253 0.022222 3 3.147507
DAR7 0.000008 0.028283 0.030303 3 3.058594
In [34]:
## Columella specific
celltype = 'col'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[34]:
col_betweenness_centrality col_out_centrality col_in_centrality centrality_count centrality_sum
NAM 0.899677 0.157377 0.316393 3 4.373447
SNI1 0.395521 0.032787 0.291803 3 3.720111
ARF10 0.165372 0.219672 0.111475 3 3.496519
AGL18 0.091976 0.060656 0.026230 3 3.178861
SCL1 0.037546 0.040984 0.183607 3 3.262136
WRKY72 0.003925 0.067213 0.068852 3 3.139990
AT5G23405 0.003279 0.024590 0.095082 3 3.122951
PRR7 0.003276 0.032787 0.062295 3 3.098358
AT5G06110 0.003276 0.014754 0.036066 3 3.054096
LUG 0.003265 0.032787 0.080328 3 3.116380
AT2G29065 0.002678 0.065574 0.031148 3 3.099400
TRP1 0.001637 0.054098 0.040984 3 3.096719
AT4G22360 0.001637 0.068852 0.068852 3 3.139342
AT3G02890 0.000676 0.032787 0.057377 3 3.090840
ATRX 0.000380 0.109836 0.029508 3 3.139724
AT1G74250 0.000194 0.057377 0.032787 3 3.090358
SYD 0.000151 0.067213 0.054098 3 3.121462
WRKY35 0.000148 0.075410 0.073770 3 3.149328
SPL14 0.000075 0.098361 0.029508 3 3.127944
EIN3 0.000005 0.060656 0.114754 3 3.175415
In [35]:
## Ground tissue
celltype1='cor'
celltype2='end'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[35]:
cor_betweenness_centrality end_betweenness_centrality cor_out_centrality end_out_centrality cor_in_centrality end_in_centrality centrality_count centrality_sum
AT1G05710 0.134613 2.462136e-02 0.361702 0.378352 0.691489 0.522031 6 8.112809
JKD 0.001363 7.880488e-03 0.588652 0.287356 0.251773 0.218391 6 7.355416
LAF1 0.030085 4.995904e-03 0.173759 0.103448 0.638298 0.139847 6 7.090433
AT3G61420 0.002675 1.012038e-03 0.180851 0.107280 0.574468 0.167625 6 7.033911
AtHB23 0.000467 1.809179e-04 0.606383 0.117816 0.187943 0.026820 6 6.939610
AT3G24120 0.030388 1.258160e-03 0.216312 0.096743 0.297872 0.173372 6 6.815945
JAZ6 0.000517 5.051006e-05 0.191489 0.045977 0.485816 0.051724 6 6.775574
MYB122 0.000038 2.981012e-03 0.049645 0.269157 0.063830 0.384100 6 6.769751
IDD4 0.000315 3.122440e-05 0.156028 0.081418 0.290780 0.082375 6 6.610948
AT1G64380 0.000215 2.755094e-06 0.131206 0.057471 0.308511 0.069923 6 6.567328
HB6 0.000126 6.851001e-04 0.109929 0.134100 0.127660 0.157088 6 6.529588
COL4 0.000480 5.051006e-05 0.148936 0.061303 0.212766 0.076628 6 6.500163
LRP1 0.001010 3.581622e-05 0.092199 0.042146 0.308511 0.033525 6 6.477425
AGL42 0.000050 7.447938e-04 0.046099 0.168582 0.035461 0.204023 6 6.454961
AT4G36860 0.001426 1.668669e-03 0.074468 0.106322 0.102837 0.159962 6 6.446683
ERF15 0.004404 6.428553e-05 0.202128 0.105364 0.039007 0.059387 6 6.410354
HAM3 0.000025 1.432649e-03 0.134752 0.031609 0.156028 0.009579 6 6.333426
LCL1 0.000050 5.510188e-06 0.081560 0.074713 0.039007 0.079502 6 6.274838
ULT1 0.000038 1.957035e-03 0.177305 0.046935 0.031915 0.008621 6 6.266770
AT3G61180 0.001186 3.122440e-05 0.024823 0.037356 0.138298 0.042146 6 6.243840
BEH2 0.002612 3.434684e-04 0.159574 0.037356 0.014184 0.016284 6 6.230354
PTF1 0.000063 5.510188e-06 0.042553 0.055556 0.060284 0.065134 6 6.223595
NGA3 0.003811 5.234679e-05 0.042553 0.063218 0.007092 0.083333 6 6.200061
AT5G23280 0.000757 5.354066e-04 0.109929 0.036398 0.017730 0.011494 6 6.176845
AT1G21580 0.000050 1.763260e-04 0.049645 0.019157 0.088652 0.019157 6 6.176839
AT2G06025 0.000025 9.183647e-07 0.010638 0.045019 0.039007 0.052682 6 6.147373
MYB14 0.001073 7.897937e-05 0.021277 0.022989 0.070922 0.017241 6 6.133580
AT2G27580 0.000139 1.377547e-04 0.021277 0.033525 0.035461 0.037356 6 6.127895
AT5G16680 0.000189 1.193874e-05 0.031915 0.028736 0.035461 0.030651 6 6.126964
NTM1 0.036281 2.938767e-04 0.010638 0.019157 0.031915 0.022031 6 6.120316
SHL1 0.000114 1.836729e-06 0.014184 0.028736 0.049645 0.025862 6 6.118543
OFP12 0.002145 1.154384e-03 0.063830 0.028736 0.014184 0.004789 6 6.114839
BZIP60 0.001981 9.183647e-07 0.024823 0.021073 0.039007 0.021073 6 6.107958
TCP8 0.000315 7.071408e-05 0.010638 0.036398 0.014184 0.031609 6 6.093217
IDD1 0.000013 1.028568e-04 0.007092 0.032567 0.014184 0.035441 6 6.089400
SIGE 0.000025 2.846931e-05 0.014184 0.008621 0.049645 0.004789 6 6.077293
BRM 0.000013 1.836729e-06 0.007092 0.016284 0.031915 0.015326 6 6.070631
EBS 0.000013 2.442850e-04 0.014184 0.022989 0.014184 0.014368 6 6.065982
In [36]:
## Cortex specific
celltype = 'cor'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[36]:
cor_betweenness_centrality cor_out_centrality cor_in_centrality centrality_count centrality_sum
AT2G38300 0.037341 0.049645 0.397163 3 3.484150
AT1G72210 0.012165 0.336879 0.535461 3 3.884506
KNAT3 0.002410 0.031915 0.088652 3 3.122978
ETR2 0.000909 0.102837 0.124113 3 3.227859
HK2 0.000278 0.024823 0.092199 3 3.117299
AGL16 0.000151 0.017730 0.021277 3 3.039159
PC-MYB1 0.000114 0.003546 0.017730 3 3.021390
ZFN1 0.000050 0.074468 0.131206 3 3.205724
SCL27 0.000038 0.131206 0.049645 3 3.180889
GLK2 0.000038 0.060284 0.294326 3 3.354648
HSFB3 0.000025 0.024823 0.074468 3 3.099316
AT1G68070 0.000025 0.049645 0.102837 3 3.152508
RGL3 0.000013 0.017730 0.322695 3 3.340438
DOT2 0.000013 0.003546 0.024823 3 3.028381
EIL1 0.000013 0.039007 0.127660 3 3.166679
AT1G63170 0.000013 0.010638 0.078014 3 3.088665
In [37]:
## Endodermis specific
celltype = 'end'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[37]:
end_betweenness_centrality end_out_centrality end_in_centrality centrality_count centrality_sum
GATA15 2.213351e-02 0.018199 0.001916 3 3.042248
AT4G30180 8.862220e-03 0.050766 0.000958 3 3.060586
SCR 5.898657e-03 0.253831 0.133142 3 3.392872
HB23 4.132641e-03 0.001916 0.004789 3 3.010838
bZIP58 3.106828e-03 0.134100 0.244253 3 3.381459
... ... ... ... ... ...
SWC2 9.183647e-07 0.023946 0.023946 3 3.047894
AL3 9.183647e-07 0.008621 0.002874 3 3.011495
AT3G04930 9.183647e-07 0.020115 0.016284 3 3.036399
MAF3 9.183647e-07 0.022031 0.024904 3 3.046936
AP2 9.183647e-07 0.029693 0.024904 3 3.054599

129 rows × 5 columns

In [38]:
## Stele
celltype1='per'
celltype2='pro'
celltype3='xyl'
celltype4='phl'
ts = tf_spec[tf_spec['tf_occurance']==4][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype4+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype4+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality', celltype4+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==12].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[38]:
per_betweenness_centrality pro_betweenness_centrality xyl_betweenness_centrality phl_betweenness_centrality per_out_centrality pro_out_centrality xyl_out_centrality phl_out_centrality per_in_centrality pro_in_centrality xyl_in_centrality phl_in_centrality centrality_count centrality_sum
MYB88 0.044110 0.031265 0.078334 0.000225 0.069641 0.381847 0.568966 0.104510 0.103373 0.715180 0.172414 0.101210 12 14.371074
IAA9 0.000450 0.003272 0.000166 0.000142 0.030468 0.178404 0.580460 0.096810 0.040261 0.458529 0.143678 0.084708 12 13.617348
PHB 0.010258 0.000015 0.072221 0.000151 0.019587 0.100156 0.770115 0.024202 0.041349 0.370892 0.080460 0.022002 12 13.511409
UNE12 0.022240 0.007955 0.022025 0.000909 0.219804 0.250391 0.086207 0.053905 0.371055 0.162754 0.057471 0.062706 12 13.317424
In [39]:
## Pericycle
celltype = 'per'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[39]:
per_betweenness_centrality per_out_centrality per_in_centrality centrality_count centrality_sum
IDD11 0.586374 0.026115 0.059848 3 3.672337
SAP 0.213124 0.003264 0.002176 3 3.218564
MGP 0.071957 0.029380 0.021763 3 3.123100
NUC 0.069583 0.025027 0.070729 3 3.165339
AT1G64620 0.058530 0.009793 0.020675 3 3.088997
HAT9 0.034638 0.093580 0.092492 3 3.220710
LBD14 0.033552 0.033732 0.022851 3 3.090135
GATA23 0.026943 0.010881 0.035909 3 3.073733
AT4G20970 0.007375 0.014146 0.007617 3 3.029138
ERF12 0.006133 0.093580 0.104461 3 3.204174
AT3G21330 0.001291 0.028292 0.002176 3 3.031759
AT2G20100 0.001285 0.082699 0.066376 3 3.150360
OFP7 0.001088 0.016322 0.023939 3 3.041349
AT4G26810 0.000038 0.003264 0.021763 3 3.025065
HDA3 0.000031 0.030468 0.032644 3 3.063143
SAC51 0.000013 0.039173 0.054407 3 3.093593
AIP3 0.000008 0.014146 0.004353 3 3.018507
AT4G25410 0.000001 0.032644 0.057671 3 3.090317
In [40]:
## Procambium
celltype = 'pro'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[40]:
pro_betweenness_centrality pro_out_centrality pro_in_centrality centrality_count centrality_sum
MBD02 0.002092 0.061033 0.015649 3 3.078775
AT3G60080 0.000025 0.062598 0.023474 3 3.086097
ARIA 0.000020 0.054773 0.010955 3 3.065747
CDC5 0.000007 0.045383 0.015649 3 3.061040
DOF1 0.000002 0.068858 0.023474 3 3.092334
In [41]:
## Xylem
celltype = 'xyl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[41]:
xyl_betweenness_centrality xyl_out_centrality xyl_in_centrality centrality_count centrality_sum
LBD31 0.897681 0.316092 0.482759 3 4.696532
VND7 0.895090 0.166667 0.850575 3 4.912331
AT1G66810 0.818284 0.178161 0.793103 3 4.789549
IAA6 0.801940 0.522989 0.517241 3 4.842170
VND2 0.795396 0.224138 0.867816 3 4.887350
MYB83 0.727493 0.235632 0.839080 3 4.802206
VND4 0.709920 0.086207 0.844828 3 4.640954
MYB46 0.685569 0.293103 0.902299 3 4.880971
VND5 0.584646 0.097701 0.839080 3 4.521427
XND1 0.530397 0.459770 0.310345 3 4.300512
AT1G68200 0.400405 0.270115 0.454023 3 4.124543
VND3 0.285330 0.172414 0.735632 3 4.193376
MYB99 0.266726 0.137931 0.178161 3 3.582818
HB31 0.235200 0.126437 0.293103 3 3.654741
ZHD3 0.231945 0.195402 0.770115 3 4.197462
NAC050 0.186001 0.224138 0.477011 3 3.887150
VND6 0.157730 0.114943 0.333333 3 3.606006
MYB52 0.126171 0.218391 0.522989 3 3.867550
VND1 0.089064 0.339080 0.367816 3 3.795960
LBD18 0.068334 0.402299 0.844828 3 4.315461
MYB85 0.065876 0.235632 0.339080 3 3.640589
AT1G68810 0.036077 0.948276 0.839080 3 4.823434
AP3 0.016610 0.241379 0.051724 3 3.309714
JLO 0.011029 0.379310 0.810345 3 4.200684
HAT14 0.005681 0.327586 0.017241 3 3.350508
AT1G26610 0.005614 0.028736 0.005747 3 3.040097
TCP20 0.005415 0.057471 0.028736 3 3.091622
In [42]:
## Phloem
celltype = 'phl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[42]:
phl_betweenness_centrality phl_out_centrality phl_in_centrality centrality_count centrality_sum
AT3G12730 0.135933 0.770077 0.775578 3 4.681587
NAC057 0.052057 0.551155 0.444444 3 4.047656
HCA2 0.026417 0.440044 0.320132 3 3.786593
HB-2 0.025951 0.008801 0.008801 3 3.043552
DAR2 0.025764 0.584158 0.510451 3 4.120374
... ... ... ... ... ...
ATWHY2 0.000004 0.024202 0.029703 3 3.053909
AT4G00270 0.000002 0.023102 0.026403 3 3.049507
NFXL2 0.000002 0.015402 0.011001 3 3.026405
FLP 0.000002 0.070407 0.059406 3 3.129815
AT4G23860 0.000002 0.002200 0.004400 3 3.006603

123 rows × 5 columns

Search for individual genes¶

In [43]:
gene = 'SHR'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[43]:
tf_occurance end_degree_centrality end_out_centrality end_in_centrality end_betweenness_centrality end_closeness_centrality end_eigenvector_centrality per_degree_centrality per_out_centrality per_in_centrality per_betweenness_centrality per_closeness_centrality per_eigenvector_centrality pro_degree_centrality pro_out_centrality pro_in_centrality pro_closeness_centrality pro_eigenvector_centrality
SHR 2.0 0.012452 0.009579 0.002874 9.183647e-07 0.00025 0.009533 0.050054 0.031556 0.018498 0.001145 0.000255 0.02326 0.079812 0.054773 0.025039 0.000223 0.038868
In [44]:
gene = 'BLJ'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[44]:
tf_occurance cor_degree_centrality cor_out_centrality cor_in_centrality cor_betweenness_centrality cor_closeness_centrality cor_eigenvector_centrality end_degree_centrality end_out_centrality end_in_centrality end_betweenness_centrality end_closeness_centrality end_eigenvector_centrality per_degree_centrality per_out_centrality per_in_centrality per_betweenness_centrality per_closeness_centrality per_eigenvector_centrality
BLJ 3.0 0.230496 0.223404 0.007092 0.007824 0.000255 0.069038 0.921456 0.413793 0.507663 0.014896 0.000455 0.123471 0.201306 0.198041 0.003264 0.018165 0.000349 0.066549
In [45]:
gene = 'JKD'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[45]:
tf_occurance cor_degree_centrality cor_out_centrality cor_in_centrality cor_betweenness_centrality cor_closeness_centrality cor_eigenvector_centrality end_degree_centrality end_out_centrality end_in_centrality end_betweenness_centrality end_closeness_centrality end_eigenvector_centrality
JKD 2.0 0.840426 0.588652 0.251773 0.001363 0.000312 0.134186 0.505747 0.287356 0.218391 0.00788 0.000442 0.099119
In [46]:
gene = 'RVN'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[46]:
tf_occurance end_degree_centrality end_out_centrality end_in_centrality end_betweenness_centrality end_closeness_centrality end_eigenvector_centrality
RVN 1.0 0.087165 0.048851 0.038314 0.000003 0.000361 0.037416
In [47]:
gene = 'BIB'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[47]:
tf_occurance end_degree_centrality end_out_centrality end_in_centrality end_betweenness_centrality end_closeness_centrality end_eigenvector_centrality
BIB 1.0 0.241379 0.140805 0.100575 0.001481 0.000415 0.068826
In [48]:
gene = 'IME'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[48]:
In [49]:
gene = 'MYB66'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[49]:
In [50]:
gene = 'GL2'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[50]:
tf_occurance atri_degree_centrality atri_out_centrality atri_in_centrality atri_betweenness_centrality atri_closeness_centrality atri_eigenvector_centrality lrc_degree_centrality lrc_out_centrality lrc_in_centrality lrc_betweenness_centrality lrc_closeness_centrality lrc_eigenvector_centrality
GL2 2.0 0.527273 0.446753 0.080519 0.697348 0.000764 0.120714 0.193939 0.123232 0.070707 0.591539 0.00069 0.067474
In [51]:
tf_spec.to_csv('TF_GRN_centrality_t5-t7_zscore3.csv', index=True)
In [ ]: